#!pip install pystan fbprophet
#!pip install pmdarima
import pandas as pd
from fbprophet import Prophet
import plotly.express as px
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
# import data & convert datetime column
df = pd.read_csv("Historic_GRPs_2021-09-15.csv")
df['date'] = pd.to_datetime(df['Date'])
del df['Date']
df['year'] = df['date'].dt.year
df['month'] = df['date'].dt.month
df.head()
co = df.loc[:, df.columns.isin(
['date','year', 'AMSTEL BEER','ANGRY ORCHARD BEVERAGES-ALCOHOLIC HARD CIDER', 'ATHLETIC BREWING BEER',
'BALLAST POINT BREWING CO BEER', 'BLUE MOON BEER', 'CAPE LINE BEVERAGES-ALCOHOLIC',
'CERVEZA SOL', 'COORS BREWING BEER', 'COORS HARD SELTZER', 'COORS LIGHT BEER',
'CORONA EXTRA BEER', 'CORONA HARD SELTZER', 'CORONA LIGHT BEER','CORONA PREMIER BEER', 'CORONA REFRESCA',
'CRISPIN BEVERAGES-ALCOHOLIC', 'DESCHUTES BREWERY BEER',
'DOS EQUIS BEER', 'FIRESTONE WALKER BEER', 'FOUR LOCO BEVERAGES-ALCOHOLIC', 'FOUR LOCO CAFFEINATED BEVERAGES-ALCOHOLIC',
'GOLD BUCKLE BEER','GUINESS COLD BREW', 'GUINNESS BEER', 'GUINNESS BLONDE AMERICAN LAGER BEER', 'GUINNESS NITRO IPA',
'HEINEKEN BEER', 'HEINEKEN LIGHT BEER',
'HENRYS HARD SODA', 'HIGH NOON SUN SIPS', 'LEINENKUGELS BEER', 'MICKEY\'S FINE MALT LIQUOR',
'MIKES BEVERAGES-ALCOHOLIC MALT COCKTAIL', 'MIKES HARD LEMONADE', 'MIKES HARD LEMONADE SELTZER',
'MILLER BREWING BEER', 'MILLER HIGH LIFE BEER', 'MILLER LITE BEER',
'MODELO ESPECIAL BEER', 'MODELO ESPECIAL CHELADA BEER', 'PABST BEER',
'PALM BREEZE BEVERAGES-ALCOHOLIC', 'PERONI BEER','SAINT ARCHER BREWING', 'SAMUEL ADAMS BEER',
'SAUCY BREW WORKS BEER', 'SEAGRAMS ESCAPES BEVERAGES-ALCOHOLIC', 'SHINER BEER', 'SHIPYARD BEER ISLAND TIME',
'SIERRA NEVADA BEER', 'SMIRNOFF HARD SELTZER', 'SMIRNOFF ICE BEVERAGES-ALCOHOLIC', 'SMITH & FORGE BEVERAGES-ALCOHOLIC HARD CIDER',
'STRONGBOW BEVERAGES-ALCOHOLIC HARD CIDER', 'TECATE BEER', 'TECATE LIGHT BEER', 'TRAVELER BEER',
'TRULY SPIKED AND SPARKLING ALCOHOLIC BEVERAGE', 'TWISTED TEA BEVERAGES-ALCOHOLIC MALT LIQUOR', 'VIZZY HARD SELTZER',
'WHITE CLAW HARD SELTZER', 'WHITE CLAW HARD SELTZER ICED TEA'])]
ab = df.loc[:, ~df.columns.isin(
['AMSTEL BEER','ANGRY ORCHARD BEVERAGES-ALCOHOLIC HARD CIDER', 'ATHLETIC BREWING BEER',
'BALLAST POINT BREWING CO BEER', 'BLUE MOON BEER', 'CAPE LINE BEVERAGES-ALCOHOLIC',
'CERVEZA SOL', 'COORS BREWING BEER', 'COORS HARD SELTZER', 'COORS LIGHT BEER',
'CORONA EXTRA BEER', 'CORONA HARD SELTZER', 'CORONA LIGHT BEER','CORONA PREMIER BEER', 'CORONA REFRESCA',
'CRISPIN BEVERAGES-ALCOHOLIC', 'DESCHUTES BREWERY BEER',
'DOS EQUIS BEER', 'FIRESTONE WALKER BEER', 'FOUR LOCO BEVERAGES-ALCOHOLIC', 'FOUR LOCO CAFFEINATED BEVERAGES-ALCOHOLIC',
'GOLD BUCKLE BEER','GUINESS COLD BREW', 'GUINNESS BEER', 'GUINNESS BLONDE AMERICAN LAGER BEER', 'GUINNESS NITRO IPA',
'HEINEKEN BEER', 'HEINEKEN LIGHT BEER',
'HENRYS HARD SODA', 'HIGH NOON SUN SIPS', 'LEINENKUGELS BEER', 'MICKEY\'S FINE MALT LIQUOR',
'MIKES BEVERAGES-ALCOHOLIC MALT COCKTAIL', 'MIKES HARD LEMONADE', 'MIKES HARD LEMONADE SELTZER',
'MILLER BREWING BEER', 'MILLER HIGH LIFE BEER', 'MILLER LITE BEER',
'MODELO ESPECIAL BEER', 'MODELO ESPECIAL CHELADA BEER', 'PABST BEER',
'PALM BREEZE BEVERAGES-ALCOHOLIC', 'PERONI BEER','SAINT ARCHER BREWING', 'SAMUEL ADAMS BEER',
'SAUCY BREW WORKS BEER', 'SEAGRAMS ESCAPES BEVERAGES-ALCOHOLIC', 'SHINER BEER', 'SHIPYARD BEER ISLAND TIME',
'SIERRA NEVADA BEER', 'SMIRNOFF HARD SELTZER', 'SMIRNOFF ICE BEVERAGES-ALCOHOLIC', 'SMITH & FORGE BEVERAGES-ALCOHOLIC HARD CIDER',
'STRONGBOW BEVERAGES-ALCOHOLIC HARD CIDER', 'TECATE BEER', 'TECATE LIGHT BEER', 'TRAVELER BEER',
'TRULY SPIKED AND SPARKLING ALCOHOLIC BEVERAGE', 'TWISTED TEA BEVERAGES-ALCOHOLIC MALT LIQUOR', 'VIZZY HARD SELTZER',
'WHITE CLAW HARD SELTZER', 'WHITE CLAW HARD SELTZER ICED TEA'])]
print(f"Number of rows: {co.shape[0]}\nNumber of columns: {len(list(co.columns))}\nFile size in bytes: {co.size}")
# grab summary statistics for entire data frame (can be subset)
co.describe()
# create long version dataframe
df_stacked = co.copy()
del df_stacked['date']
frames = []
for col in list(df_stacked.columns):
col_values = list(df[col])
col_names = [col] * len(col_values)
frames.append(pd.DataFrame(list(zip(col_values,col_names)),columns=['GRP','Brand']))
df_long = pd.concat(frames)
# https://plotly.com/python/box-plots/
fig = px.box(
df_long,
x="Brand",
y="GRP",
width=1200,
height=400,
title="GRP Spend by Beverage Brand")
fig.update_layout({
'plot_bgcolor': 'rgba(0, 0, 0, 0)',
'paper_bgcolor': 'rgba(0, 0, 0, 0)',
})
fig.show()
#fig.show(renderer='browser')
comp = co.loc[:, ~co.columns.isin(
['COORS BREWING BEER',
'DESCHUTES BREWERY BEER',
'DOS EQUIS BEER',
'FIRESTONE WALKER BEER',
'GUINESS COLD BREW',
'GUINNESS BEER',
'HIGH NOON SUN SIPS',
'KONA BREWING CO BEER',
'LAGUNITAS BREWING CO BEER',
'LEINENKUGELS BEER',
'MILLER BREWING BEER',
'MODELO ESPECIAL CHELADA BEER',
'MONTEJO BEER',
'OLD BLUE LAST BEER',
'RITAS FAMILY',
'SAINT ARCHER BREWING'])]
feature_frames = []
for i in range(comp.shape[0]):
for col in list(comp.columns):
for year in list(comp['year'].unique()):
comp_sub = comp[comp['year'] == year]
try:
internal_prop = comp_sub[col][i] / comp_sub[col].sum()
external_prop = comp_sub[col][i] / comp_sub.sum(axis=1)[i]
grp = comp_sub[col][i]
annual_prop = comp[col].sum() / comp_sub[col].sum()
z_score = (comp_sub[col][i] / comp_sub[col].mean()) / comp_sub[col].std()
mu = comp_sub[col].mean()
std = comp_sub[col].std()
count = len([i for i in list(comp_sub[col]) if i > 0])
features = [col,internal_prop,external_prop, grp, z_score, mu, std, count, str(comp_sub['date'][i])]
feature_frames.append(features)
except:
pass
#Create the pandas DataFrame
cluster_df = pd.DataFrame(
feature_frames,
columns = ['brand','internal_prop','external_prop','grp', 'z_score', 'mu', 'std', 'count', 'date']).fillna(0)
cluster_df['year'] = cluster_df['date'].str[0:4]
cluster_df['month'] = cluster_df['date'].str[5:7]
seltzer=['BON VIV SPIKED SELTZER','CACTI AGAVE SPIKED SELTZER','COORS HARD SELTZER','CORONA HARD SELTZER',
'MIKES HARD LEMONADE SELTZER', 'SMIRNOFF HARD SELTZER','VIZZY HARD SELTZER','WHITE CLAW HARD SELTZER',
'WHITE CLAW HARD SELTZER ICED TEA', 'TRULY SPIKED AND SPARKLING ALCOHOLIC BEVERAGE', 'CAPE LINE BEVERAGES-ALCOHOLIC']
malt = ["MICKEY'S FINE MALT LIQUOR", 'CORONA REFRESCA', 'FOUR LOCO BEVERAGES-ALCOHOLIC', 'FOUR LOCO CAFFEINATED BEVERAGES-ALCOHOLIC',
'GUINNESS NITRO IPA', 'HENRYS HARD SODA', 'MIKES BEVERAGES-ALCOHOLIC MALT COCKTAIL', 'MIKES HARD LEMONADE',
'PALM BREEZE BEVERAGES-ALCOHOLIC', 'SMIRNOFF ICE BEVERAGES-ALCOHOLIC', 'TWISTED TEA BEVERAGES-ALCOHOLIC MALT LIQUOR']
ale = ['ATHLETIC BREWING BEER', 'BALLAST POINT BREWING CO BEER', 'BLUE MOON BEER', 'SAUCY BREW WORKS BEER',
'SHIPYARD BEER ISLAND TIME', 'SIERRA NEVADA BEER', 'TRAVELER BEER']
hard_cider = ['ANGRY ORCHARD BEVERAGES-ALCOHOLIC HARD CIDER', 'CRISPIN BEVERAGES-ALCOHOLIC',
'SMITH & FORGE BEVERAGES-ALCOHOLIC HARD CIDER', 'STRONGBOW BEVERAGES-ALCOHOLIC HARD CIDER']
lager = ['AMSTEL BEER', 'CERVEZA SOL', 'CORONA EXTRA BEER', 'CORONA LIGHT BEER', 'GOLD BUCKLE BEER','GUINNESS BLONDE AMERICAN LAGER BEER',
'HEINEKEN BEER', 'MILLER HIGH LIFE BEER', 'MODELO ESPECIAL BEER', 'PABST BEER', 'PERONI BEER', 'SAMUEL ADAMS BEER',
'SHINER BEER', 'TECATE BEER']
light_beer = ['COORS LIGHT BEER', 'CORONA PREMIER BEER', 'HEINEKEN LIGHT BEER', 'MILLER LITE BEER', 'SEAGRAMS ESCAPES BEVERAGES-ALCOHOLIC',
'TECATE LIGHT BEER']
a = cluster_df['brand'].to_list()
# classify beer_type
beer_type = []
for i in a:
if i in seltzer:
beer_type.append('seltzer')
elif i in malt:
beer_type.append('malt')
elif i in ale:
beer_type.append('ale')
elif i in hard_cider:
beer_type.append('hard_cider')
elif i in lager:
beer_type.append('lager')
elif i in light_beer:
beer_type.append('light_beer')
else:
beer_type.append('beer')
# create beer_type column
cluster_df['beer_type'] = beer_type
Boston_Beer_Company =['ANGRY ORCHARD BEVERAGES-ALCOHOLIC HARD CIDER', 'SAMUEL ADAMS BEER', 'TRAVELER BEER',
'TRULY SPIKED AND SPARKLING ALCOHOLIC BEVERAGE', 'TWISTED TEA BEVERAGES-ALCOHOLIC MALT LIQUOR']
Constellation_Brands = ['CORONA EXTRA BEER', 'CORONA HARD SELTZER', 'CORONA LIGHT BEER', 'CORONA PREMIER BEER',
'CORONA REFRESCA', 'MODELO ESPECIAL BEER', 'MODELO ESPECIAL CHELADA BEER']
Diageo = ['GUINESS COLD BREW', 'GUINNESS BEER', 'GUINNESS BLONDE AMERICAN LAGER BEER',
'GUINNESS NITRO IPA', 'SEAGRAMS ESCAPES BEVERAGES-ALCOHOLIC']
Heineken = ['AMSTEL BEER', 'DOS EQUIS BEER', 'HEINEKEN BEER', 'HEINEKEN LIGHT BEER', 'SMIRNOFF HARD SELTZER',
'SMIRNOFF ICE BEVERAGES-ALCOHOLIC', 'STRONGBOW BEVERAGES-ALCOHOLIC HARD CIDER', 'TECATE BEER', 'TECATE LIGHT BEER']
Mark_Anthony_Group = ['MIKES BEVERAGES-ALCOHOLIC MALT COCKTAIL', 'MIKES HARD LEMONADE SELTZER', 'MIKES HARD LEMONADE', 'PALM BREEZE BEVERAGES-ALCOHOLIC',
'WHITE CLAW HARD SELTZER ICED TEA', 'WHITE CLAW HARD SELTZER']
Molson_Coors = ["MICKEY'S FINE MALT LIQUOR", 'BLUE MOON BEER', 'CAPE LINE BEVERAGES-ALCOHOLIC',
'CERVEZA SOL', 'COORS BREWING BEER', 'COORS HARD SELTZER', 'COORS LIGHT BEER', 'CRISPIN BEVERAGES-ALCOHOLIC',
'LEINENKUGELS BEER', 'MILLER BREWING BEER', 'MILLER HIGH LIFE BEER', 'MILLER LITE BEER', 'PERONI BEER', 'SAINT ARCHER BREWING',
'SMITH & FORGE BEVERAGES-ALCOHOLIC HARD CIDER', 'VIZZY HARD SELTZER']
Phusion_Projects = ['FOUR LOCO BEVERAGES-ALCOHOLIC', 'FOUR LOCO CAFFEINATED BEVERAGES-ALCOHOLIC']
# classify brewery
brewery = []
for i in a:
if i in Boston_Beer_Company:
brewery.append('Boston Beer Company')
elif i in Constellation_Brands:
brewery.append('Constellation Brands')
elif i in Diageo:
brewery.append('Diageo')
elif i in Heineken:
brewery.append('Heineken')
elif i in Mark_Anthony_Group:
brewery.append('Mark Anthony Group')
elif i in Molson_Coors:
brewery.append('Molson Coors')
elif i in Phusion_Projects:
brewery.append('Phusion Projects')
else:
brewery.append('others')
# create brewery column
cluster_df['brewery'] = brewery
cluster_df = cluster_df.loc[(cluster_df["brand"] != 'year') | (cluster_df["brand"] != 'month')]
pd.set_option('display.max_rows', 10)
final = cluster_df[cluster_df['beer_type'] != 'beer']
final.head()
final1 = final[['brand', 'grp', 'date', 'beer_type']]
c = final1.groupby(['beer_type','date']).sum()
c = c.reset_index()
c.head()
d = c.groupby('date').sum()
d = d.rename(columns={"grp": "grp_sum"})
d.head()
# join c and d table
e = pd.merge(c,d, on='date', how='left')
e.head()
# create a percentage column
e['percentage'] = e['grp'] * 100 / e['grp_sum']
e.head()
# Extract brand, grp, date, year, month, beer_type, brewery columns from final dataframe
f = final[['brand', 'grp', 'date','year', 'month', 'beer_type', 'brewery']]
f.head()
# Create quarter list using month and year column
quarter = []
for year in list(f['year'].unique()):
sub = f[f['year'] == year]
for i in sub['month']:
if int(i) < 4:
quarter.append(year + 'Q1')
elif (int(i) > 3) & (int(i) < 7):
quarter.append(year + 'Q2')
elif (int(i) > 6) & (int(i) < 10):
quarter.append(year + 'Q3')
else:
quarter.append(year + 'Q4')
len(quarter)
import datetime as dt
import pandas as pd
f['quarter'] = quarter
f.head()
g = f.groupby(['quarter', 'beer_type']).sum()
g = g.reset_index()
g.head()
n = f.groupby(['quarter']).sum()
n = n.reset_index()
n.head()
p = pd.merge(n,g, on='quarter', how='left')
p.head()
p['percentage'] = p['grp_y'] * 100 / p['grp_x']
p.head()
# Quarterly SOV by beer_type
import plotly.graph_objects as go
fig = px.bar(p, y='percentage', x='quarter',text='percentage', color='beer_type',
color_discrete_map={
'seltzer': 'red',
'lager': 'royalblue',
'malt': 'purple',
'light_beer': 'mediumseagreen',
'hard_cider': 'orange'})
fig.update_traces(texttemplate='%{text:.2s}', textposition='inside')
fig.show()
# Quarterly GRP_sum by beer_type
import plotly.graph_objects as go
fig = px.bar(g, y='grp', x='quarter', color='beer_type')
fig.show()
# Quarterly GRP_sum by seltzer
import plotly.graph_objects as go
fig = px.bar(g[g['beer_type'] == 'seltzer'], y='grp', x='quarter', color='beer_type')
fig.show()
# Quarterly GRP_sum by malt
import plotly.graph_objects as go
fig = px.bar(g[g['beer_type'] == 'malt'], y='grp', x='quarter', color='beer_type')
fig.show()
g = f.groupby(['quarter', 'brewery']).sum()
g = g.reset_index()
g.head()
l = f.groupby(['quarter']).sum()
l = l.reset_index()
l.head()
m = pd.merge(l,g, on='quarter', how='left')
m.head()
m['percentage'] = m['grp_y'] * 100 / m['grp_x']
# Quarterly percentage by brewery
import plotly.graph_objects as go
fig = px.bar(m, y='percentage', x='quarter',text='percentage', color='brewery')
fig.update_traces(texttemplate='%{text:.2s}', textposition='inside')
fig.update_layout(title_text='Quarterly SOV trends')
fig.show()
# Quarterly grp_sum by brewery
import plotly.graph_objects as go
fig = px.bar(g, y='grp', x='quarter',text='grp', color='brewery')
fig.update_traces(texttemplate='%{text:.2s}', textposition='inside')
fig.show()
g1 = f.groupby(['year', 'beer_type']).sum()
g1 = g1.reset_index()
# Yearly grp_sum by beer_type
import plotly.graph_objects as go
fig = px.bar(g1, y='grp', x='year', color='beer_type')
fig.show()
# Yearly grp_sum by seltzer
import plotly.graph_objects as go
fig = px.bar(g1[g1['beer_type'] == 'seltzer'], y='grp', x='year', color='beer_type')
fig.show()
# Yearly grp_sum by malt
import plotly.graph_objects as go
fig = px.bar(g1[g1['beer_type'] == 'malt'], y='grp', x='year', color='beer_type')
fig.show()
g1 = f.groupby(['year', 'brewery']).sum()
g1 = g1.reset_index()
# Yearly grp_sum by brewery
import plotly.graph_objects as go
fig = px.bar(g1, y='grp', x='year', color='brewery')
fig.show()
import plotly.graph_objects as go
fig = px.bar(e, y='percentage', x='date', color='beer_type',
color_discrete_map={
'seltzer': 'red',
'lager': 'royalblue',
'malt': 'purple',
'light_beer': 'mediumseagreen',
'hard_cider': 'orange'})
fig.update_layout(title_text='SOV Trends')
fig.show()
cluster_df1 = cluster_df[['brand', 'grp', 'date', 'brewery', 'beer_type']]
f = cluster_df1.groupby(['brewery','date']).sum()
f = f.reset_index()
f = f[f['brewery'] != 'others']
g = f.groupby('date').sum()
g = g.rename(columns={"grp": "grp_sum"})
# pd.set_option('display.max_rows', 10)
h = pd.merge(f,g, on='date', how='left')
h['percentage'] = h['grp'] * 100 / h['grp_sum']
import plotly.graph_objects as go
fig = px.bar(h, y='percentage', x='date', text='percentage', color='brewery')
fig.update_traces(texttemplate='%{text:.2s}', textposition='inside')
fig.show()
cluster_df1 = cluster_df[['brand', 'grp', 'date', 'brewery', 'beer_type']]
i = cluster_df1.groupby(['beer_type', 'brewery','date']).sum()
i = i.reset_index()
i = i[i['brewery'] != 'others']
k = pd.merge(i,g, on='date', how='left')
# Constellation Brand
import plotly.graph_objects as go
fig = px.bar(cluster_df[cluster_df['brewery'] == 'Constellation Brands'], y='grp', x='date', color='beer_type',
color_discrete_map={
'seltzer': 'red',
'lager': 'royalblue',
'malt': 'purple',
'light_beer': 'mediumseagreen'})
fig.update_layout(title_text='Shift in SOV (Constellation Brands)')
fig.show()
# Heineken
import plotly.graph_objects as go
fig = px.bar(cluster_df[cluster_df['brewery'] == 'Heineken'], y='grp', x='date', color='beer_type',
color_discrete_map={
'seltzer': 'red',
'lager': 'royalblue',
'malt': 'purple',
'light_beer': 'mediumseagreen',
'hard_cider': 'orange'})
fig.update_layout(title_text='Shift in SOV (Heineken)')
fig.show()
# Molson Coors
import plotly.graph_objects as go
fig = px.bar(cluster_df[cluster_df['brewery'] == 'Molson Coors'], y='grp', x='date', color='beer_type',
color_discrete_map={
'seltzer': 'red',
'lager': 'royalblue',
'malt': 'purple',
'light_beer': 'mediumseagreen',
'hard_cider': 'orange'})
fig.update_layout(title_text='Shift in SOV (Molson Coors)')
fig.show()
# Boston Beer Company
import plotly.graph_objects as go
fig = px.bar(cluster_df[cluster_df['brewery'] == 'Boston Beer Company'], y='grp', x='date', color='beer_type',
color_discrete_map={
'seltzer': 'red',
'lager': 'royalblue',
'malt': 'purple',
'light_beer': 'mediumseagreen',
'hard_cider': 'orange'})
fig.update_layout(title_text='Shift in SOV (Boston Beer Company)')
fig.show()
# Diageo
import plotly.graph_objects as go
fig = px.bar(cluster_df[cluster_df['brewery'] == 'Diageo'], y='grp', x='date', color='beer_type',
color_discrete_map={
'seltzer': 'red',
'lager': 'royalblue',
'malt': 'purple',
'light_beer': 'mediumseagreen',
'hard_cider': 'orange'})
fig.update_layout(title_text='Shift in SOV (Diageo)')
fig.show()
# Constellation Brands
import plotly.graph_objects as go
fig = px.bar(cluster_df[cluster_df['brewery'] == 'Constellation Brands'], y='grp', x='date', color='brand')
fig.show()
# Heineken
import plotly.graph_objects as go
fig = px.bar(cluster_df[cluster_df['brewery'] == 'Heineken'], y='grp', x='date', color='brand')
fig.show()
# Molson Coors
import plotly.graph_objects as go
fig = px.bar(cluster_df[cluster_df['brewery'] == 'Molson Coors'], y='grp', x='date', color='brand')
fig.show()
# Boston Beer Company
import plotly.graph_objects as go
fig = px.bar(cluster_df[cluster_df['brewery'] == 'Boston Beer Company'], y='grp', x='date', color='brand')
fig.show()
# Diageo'
import plotly.graph_objects as go
fig = px.bar(cluster_df[cluster_df['brewery'] == 'Diageo'], y='grp', x='date', color='beer_type')
fig.show()
#Replace NaN values with 0
c0= co.fillna(0)
#Index date column
c0['ds']=pd.DatetimeIndex(c0['date'])
#Corona
corona =c0[['date', 'CORONA EXTRA BEER']]
corona.columns = ['ds', 'y'] #Need to rename the columns
#Split the data
train=corona.iloc[:-12]#except the last 30 values as training set
test=corona.iloc[-12:] #last 30 values as testing values
print('train shape', train.shape)
print('test shape', test.shape)
# set the date into index
train.set_index('ds', inplace=True)
test.set_index('ds', inplace=True)
#Set the Index for 2 years
index_years = pd.date_range(train.index[-1], freq='MS', periods = 19) #MS= month start
index_years
from pmdarima import auto_arima
#autoarima function: try different combinations of orders and each model will assign a score AIC.
#lower the AIC is better
stepwise_fit = auto_arima(corona['y'], trace=True,
suppress_warnings=True)
#we can test different p,d,q values in future but it can take too long
#1: Running ARIMA with best model
from statsmodels.tsa.arima_model import ARIMA
model_arima = ARIMA(train, order=(2,0,1)) # Best model
model_arima_fit = model_arima.fit(disp=-1)
# Saving ARIMA predictions
fcast1 = model_arima_fit.forecast(19)[0]
# Passing the same index as the others
fcast1 = pd.Series(fcast1, index=index_years)
fcast1 = fcast1.rename("Arima")
##################################################
#2: Running auto ARIMA
import pmdarima as pm
auto_arima_model = pm.auto_arima(train, seasonal=False, m=19)
# Read more about setting m
# https://alkaline-ml.com/pmdarima/tips_and_tricks.html
# make your forecasts
fcast2 = auto_arima_model.predict(19)
fcast2 = pd.Series(fcast2, index=index_years)
fcast2 = fcast2.rename("Auto Arima")
#################################################
#3: Running fbprophet
from pandas import to_datetime
#define the model
model = Prophet()
# fit the model
model.fit(corona)
# define the period for which we want a prediction
df_index_years = pd.DataFrame(index_years)
df_index_years.columns = ['ds']
df_index_years['ds']= to_datetime(df_index_years['ds'])
# use the model to make a forecast
fcast3 = model.predict(df_index_years)
fcast3 = pd.Series(fcast3['yhat'].values, index=index_years)
fcast3 = fcast3.rename("Prophet")
# Ploting the predictions
fig, ax = plt.subplots(figsize=(15,5))
chart = sns.lineplot(x='ds', y='y', data = corona)
chart.set_title('Corona')
fcast1.plot(ax=ax, color='red', marker="o", legend=True)
fcast2.plot(ax=ax, color='green', marker="o", legend=True)
fcast3.plot(ax=ax, color='orange', marker="o", legend=True)
test.plot(ax=ax, color='blue', marker="o", legend=True)
modelo =c0[['date', 'MODELO ESPECIAL BEER']]
modelo.columns = ['ds', 'y'] #Need to rename the columns
#modelo =modelo.iloc[48:] #only ast 2 years or so
#Split the data
train_m=modelo.iloc[:-12]#from 2019-07-01 to 2020-07-01
test_m=modelo.iloc[-12:] #last 30 values as testing values
# set the date into index
train_m.set_index('ds', inplace=True)
test_m.set_index('ds', inplace=True)
#Set the Index for 2 years
index_m = pd.date_range(train_m.index[-1], freq='MS', periods = 19) #MS= month start
index_m
stepwise_fit = auto_arima(modelo['y'], trace=True,
suppress_warnings=True)
####### Running ARIMA with best model
modelo_arima = ARIMA(train_m, order=(4,1,3)) # Best model
modelo_arima_fit = modelo_arima.fit(disp=-1)
# Saving ARIMA predictions
fcast1_m = modelo_arima_fit.forecast(19)[0]
# Passing the same index as the others
fcast1_m = pd.Series(fcast1_m, index=index_m)
fcast1_m = fcast1_m.rename("Arima")
####### Auto ARIMA
# Running auto ARIMA
auto_arima_modelo = pm.auto_arima(train_m, seasonal=False, m=19)
# make your forecasts
fcast2_m = auto_arima_modelo.predict(19)
fcast2_m = pd.Series(fcast2_m, index=index_m)
fcast2_m = fcast2_m.rename("Auto Arima")
####### fbProphet
from pandas import to_datetime
# define the model
prophet_modelo = Prophet()
# fit the model
prophet_modelo.fit(modelo)
# define the period for which we want a prediction
df_m = pd.DataFrame(index_m)
df_m.columns = ['ds']
df_m['ds']= to_datetime(df_m['ds'])
# use the model to make a forecast
fcast3_m = prophet_modelo.predict(df_m)
fcast3_m = pd.Series(fcast3_m['yhat'].values, index=index_m)
fcast3_m = fcast3_m.rename("Prophet")
# Ploting the predictions
fig, ax = plt.subplots(figsize=(15,5))
chart = sns.lineplot(x='ds', y='y', data = modelo)
chart.set_title('Modelo')
fcast1_m.plot(ax=ax, color='red', marker="o", legend=True)
fcast2_m.plot(ax=ax, color='green', marker="o", legend=True)
fcast3_m.plot(ax=ax, color='orange', marker="o", legend=True)
test_m.plot(ax=ax, color='blue', marker="o", legend=True)
heineken =c0[['date', 'HEINEKEN BEER']]
heineken.columns = ['ds', 'y'] #Need to rename the columns
heineken =heineken.iloc[54:]
#Split the data
train_m=heineken.iloc[:-12]#from 2019-07-01 to 2020-07-01
test_m=heineken.iloc[-12:] #last 30 values as testing values
# set the date into index
train_m.set_index('ds', inplace=True)
test_m.set_index('ds', inplace=True)
#Set the Index for 2 years
index_m = pd.date_range(train_m.index[-1], freq='MS', periods = 19) #MS= month start
index_m
stepwise_fit = auto_arima(heineken['y'], trace=True,
suppress_warnings=True)
####### Running ARIMA with best model
modelo_arima = ARIMA(train_m, order=(0,0,1)) # Best model
modelo_arima_fit = modelo_arima.fit(disp=-1)
# Saving ARIMA predictions
fcast1_m = modelo_arima_fit.forecast(19)[0]
# Passing the same index as the others
fcast1_m = pd.Series(fcast1_m, index=index_m)
fcast1_m = fcast1_m.rename("Arima")
####### Auto ARIMA
# Running auto ARIMA
auto_arima_modelo = pm.auto_arima(train_m, seasonal=False, m=19)
# make your forecasts
fcast2_m = auto_arima_modelo.predict(19)
fcast2_m = pd.Series(fcast2_m, index=index_m)
fcast2_m = fcast2_m.rename("Auto Arima")
####### fbProphet
from pandas import to_datetime
# define the model
prophet_modelo = Prophet()
# fit the model
prophet_modelo.fit(heineken)
# define the period for which we want a prediction
df_m = pd.DataFrame(index_m)
df_m.columns = ['ds']
df_m['ds']= to_datetime(df_m['ds'])
# use the model to make a forecast
fcast3_m = prophet_modelo.predict(df_m)
fcast3_m = pd.Series(fcast3_m['yhat'].values, index=index_m)
fcast3_m = fcast3_m.rename("Prophet")
# Ploting the predictions
fig, ax = plt.subplots(figsize=(15,5))
chart = sns.lineplot(x='ds', y='y', data = heineken)
chart.set_title('Heineken')
fcast1_m.plot(ax=ax, color='red', marker="o", legend=True)
fcast2_m.plot(ax=ax, color='green', marker="o", legend=True)
fcast3_m.plot(ax=ax, color='orange', marker="o", legend=True)
test_m.plot(ax=ax, color='blue', marker="o", legend=True)
coors =c0[['date', 'COORS LIGHT BEER']]
coors.columns = ['ds', 'y'] #Need to rename the columns
coors =coors.iloc[54:]
#Split the data
train_m=coors.iloc[:-12]#from 2019-07-01 to 2020-07-01
test_m=coors.iloc[-12:] #last 30 values as testing values
# set the date into index
train_m.set_index('ds', inplace=True)
test_m.set_index('ds', inplace=True)
#Set the Index for 2 years
index_m = pd.date_range(train_m.index[-1], freq='MS', periods = 19) #MS= month start
index_m
stepwise_fit = auto_arima(coors['y'], trace=True,
suppress_warnings=True)
####### Running ARIMA with best model
modelo_arima = ARIMA(train_m, order=(0,1,0)) # Best model is ARIMA(0,0,1) for 2 years or so
modelo_arima_fit = modelo_arima.fit(disp=-1)
# Saving ARIMA predictions
fcast1_m = modelo_arima_fit.forecast(19)[0]
# Passing the same index as the others
fcast1_m = pd.Series(fcast1_m, index=index_m)
fcast1_m = fcast1_m.rename("Arima")
####### Auto ARIMA
# Running auto ARIMA
auto_arima_modelo = pm.auto_arima(train_m, seasonal=False, m=19)
# make your forecasts
fcast2_m = auto_arima_modelo.predict(19)
fcast2_m = pd.Series(fcast2_m, index=index_m)
fcast2_m = fcast2_m.rename("Auto Arima")
####### fbProphet
from pandas import to_datetime
# define the model
prophet_modelo = Prophet()
# fit the model
prophet_modelo.fit(coors)
# define the period for which we want a prediction
df_m = pd.DataFrame(index_m)
df_m.columns = ['ds']
df_m['ds']= to_datetime(df_m['ds'])
# use the model to make a forecast
fcast3_m = prophet_modelo.predict(df_m)
fcast3_m = pd.Series(fcast3_m['yhat'].values, index=index_m)
fcast3_m = fcast3_m.rename("Prophet")
# Ploting the predictions
fig, ax = plt.subplots(figsize=(15,5))
chart = sns.lineplot(x='ds', y='y', data = coors)
chart.set_title('COORS LIGHT BEER')
fcast1_m.plot(ax=ax, color='red', marker="o", legend=True)
fcast2_m.plot(ax=ax, color='green', marker="o", legend=True)
fcast3_m.plot(ax=ax, color='orange', marker="o", legend=True)
test_m.plot(ax=ax, color='blue', marker="o", legend=True)
miller =c0[['date', 'MILLER LITE BEER']]
miller.columns = ['ds', 'y'] #Need to rename the columns
miller =miller.iloc[54:]
#Split the data
train_m=miller.iloc[:-12]#from 2019-07-01 to 2020-07-01
test_m=miller.iloc[-12:] #last 30 values as testing values
# set the date into index
train_m.set_index('ds', inplace=True)
test_m.set_index('ds', inplace=True)
#Set the Index for 2 years
index_m = pd.date_range(train_m.index[-1], freq='MS', periods = 19) #MS= month start
index_m
stepwise_fit = auto_arima(miller['y'], trace=True,
suppress_warnings=True)
####### Running ARIMA with best model
modelo_arima = ARIMA(train_m, order=(2,0,0)) # Best model is ARIMA(0,0,1) for 2 years or so
modelo_arima_fit = modelo_arima.fit(disp=-1)
# Saving ARIMA predictions
fcast1_m = modelo_arima_fit.forecast(19)[0]
# Passing the same index as the others
fcast1_m = pd.Series(fcast1_m, index=index_m)
fcast1_m = fcast1_m.rename("Arima")
####### Auto ARIMA
# Running auto ARIMA
auto_arima_modelo = pm.auto_arima(train_m, seasonal=False, m=19)
# make your forecasts
fcast2_m = auto_arima_modelo.predict(19)
fcast2_m = pd.Series(fcast2_m, index=index_m)
fcast2_m = fcast2_m.rename("Auto Arima")
####### fbProphet
from pandas import to_datetime
# define the model
prophet_modelo = Prophet()
# fit the model
prophet_modelo.fit(miller)
# define the period for which we want a prediction
df_m = pd.DataFrame(index_m)
df_m.columns = ['ds']
df_m['ds']= to_datetime(df_m['ds'])
# use the model to make a forecast
fcast3_m = prophet_modelo.predict(df_m)
fcast3_m = pd.Series(fcast3_m['yhat'].values, index=index_m)
fcast3_m = fcast3_m.rename("Prophet")
# Ploting the predictions
fig, ax = plt.subplots(figsize=(15,5))
chart = sns.lineplot(x='ds', y='y', data = miller)
chart.set_title('MILLER LITE BEER')
fcast1_m.plot(ax=ax, color='red', marker="o", legend=True)
fcast2_m.plot(ax=ax, color='green', marker="o", legend=True)
fcast3_m.plot(ax=ax, color='orange', marker="o", legend=True)
test_m.plot(ax=ax, color='blue', marker="o", legend=True)
seltzer = df.loc[:, df.columns.isin(
['date','TRULY SPIKED AND SPARKLING ALCOHOLIC BEVERAGE', 'CORONA HARD SELTZER','HIGH NOON SUN SIPS',
'SMIRNOFF HARD SELTZER','MIKES HARD LEMONADE SELTZER','WHITE CLAW HARD SELTZER ICED TEA', 'WHITE CLAW HARD SELTZER',
'CAPE LINE BEVERAGES-ALCOHOLIC','COORS HARD SELTZER','VIZZY HARD SELTZER'])]
#Replace NaN values with 0
s0= seltzer.fillna(0)
s0['total'] = s0.sum(axis=1)
s0.tail()
#Index date column
s0['ds']=pd.DatetimeIndex(s0['date'])
s0 =s0[['ds','total']]
s0.columns = ['ds', 'y'] #Need to rename the columns
s0 =s0.iloc[54:]
s0
#Split the data
train=s0.iloc[:-12]#from 2019-07-01 to 2020-07-01
test=s0.iloc[-12:] #last 30 values as testing values
print('train shape', train.shape)
print('test shape', test.shape)
# set the date into index
train.set_index('ds', inplace=True)
test.set_index('ds', inplace=True)
#Set the Index for 2 years
index_years = pd.date_range(train.index[-1], freq='MS', periods = 19) #MS= month start
index_years
stepwise_fit = auto_arima(s0['y'], trace=True,
suppress_warnings=True)
####### Running ARIMA with best model
modelo_arima = ARIMA(train, order=(1,0,0)) # Best model for 2 years or so
modelo_arima_fit = modelo_arima.fit(disp=-1)
# Saving ARIMA predictions
fcast1_m = modelo_arima_fit.forecast(19)[0]
# Passing the same index as the others
fcast1_m = pd.Series(fcast1_m, index=index_years)
fcast1_m = fcast1_m.rename("Arima")
####### Auto ARIMA
# Running auto ARIMA
auto_arima_modelo = pm.auto_arima(train, seasonal=False, m=19)
# make your forecasts
fcast2_m = auto_arima_modelo.predict(19)
fcast2_m = pd.Series(fcast2_m, index=index_years)
fcast2_m = fcast2_m.rename("Auto Arima")
####### fbProphet
from pandas import to_datetime
# define the model
prophet_modelo = Prophet()
# fit the model
prophet_modelo.fit(s0)
# define the period for which we want a prediction
df_m = pd.DataFrame(index_years)
df_m.columns = ['ds']
df_m['ds']= to_datetime(df_m['ds'])
# use the model to make a forecast
fcast3_m = prophet_modelo.predict(df_m)
fcast3_m = pd.Series(fcast3_m['yhat'].values, index=index_years)
fcast3_m = fcast3_m.rename("Prophet")
# Ploting the predictions
fig, ax = plt.subplots(figsize=(15,5))
chart = sns.lineplot(x='ds', y='y', data = s0)
chart.set_title('Seltzer')
fcast1_m.plot(ax=ax, color='red', marker="o", legend=True)
fcast2_m.plot(ax=ax, color='green', marker="o", legend=True)
fcast3_m.plot(ax=ax, color='orange', marker="o", legend=True)
test.plot(ax=ax, color='blue', marker="o", legend=True)